First go through coding examples for two causal inference methods:
Then go through some..
R that determines
treatment X. So even if we have endogeneity via
W, we can identify X -> YZ that takes the place
of R?X with Z, and keep only what
is explained, X'Y with Z, and keep only what
is explained, Y'
Most commonly this is estimated using two stage least squares
Many ways to do this in R, I’ll be doing 2SLS with
feols() from fixest
library(AER)
#US income and consumption data 1950-1993
data(USConsump1993)
USC93 <- as.data.frame(USConsump1993)
#lag() gets the observation above; here the observation above is last year
IV <- USC93 %>% mutate(lastyr.invest = lag(income) - lag(expenditure))
# 2SLS estimation
m_iv <- feols(expenditure ~ 1 | income ~ lastyr.invest, data = IV, se = 'hetero')| Income (First Stage) | Expenditure | |
|---|---|---|
| + p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001 | ||
| Income (2SLS) | 0.892*** | |
| (0.009) | ||
| Lagged Investment | 8.210*** | |
| (0.620) | ||
| Num.Obs. | 43 | 43 |
| Std.Errors | Heteroskedasticity-robust | Heteroskedasticity-robust |
* Load the data
import delimited "data/usconsump1993.csv", clear
* generate lagged investment variable and time variable
gen year = _n + 1949
tsset year
gen lastyr_invest = L.income - L.expenditure
* 2SLS estimation: instrument income with lagged investment
ivregress 2sls expenditure (income = lastyr_invest), vce(robust)data(injury) from
library(wooldridge)data(injury, package = 'wooldridge')
injury <- injury %>%
filter(ky == 1) %>% # Kentucky only
mutate(Treated = afchnge*highearn)
m1_did <- feols(ldurat ~ Treated | highearn + afchnge, data = injury)
msummary(m1_did, stars = TRUE, gof_omit = 'FE|RMSE|R2|AIC|BIC|Lik|Adj|Pseudo')| (1) | |
|---|---|
| + p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001 | |
| Treated | 0.191** |
| (0.069) | |
| Num.Obs. | 5626 |
| Std.Errors | IID |
library(dplyr)
library(fixest)
library(ggplot2)
library(readr)
df <- read_csv('data/eitc.csv') %>%
mutate(treated = 1*(children > 0)) %>%
mutate(year = factor(year))
# assert that '1993' is a level of year
stopifnot('1993' %in% levels(df$year))
m <- feols(work ~ i(year, treated, ref = '1993') | treated + year, data = df)
coef_plot <- ggcoefplot(m, ref = c('1993' = 3), pt.join = TRUE) +
labs(title = "Dynamic Difference-in-Differences Estimates of EITC on Work",
x = "Year",
y = "Coefficient Estimate (ref: 1993)") +
theme_minimal() +
theme(plot.title = element_text(size = 24),
axis.text = element_text(size = 18),
axis.title = element_text(size = 18))feols() in R)